From c28478383c1a628d42866803d785e1f23baa33be Mon Sep 17 00:00:00 2001 From: Debarshi Ray Date: Thu, 21 Dec 2017 10:14:53 +0100 Subject: [PATCH] CIE: Use a faster cbrtf implementation This is the approximate cube root of an IEEE float implementation from Hacker's Delight. The elimination of all conditional branches probably makes it a better candidate for future SIMD accelerated code paths. On an Intel i7 Haswell, it now takes 0.27s to convert a 15 megapixel buffer from "RGBA float" to "CIE Lab alpha float" instead of the earlier 0.35s. A "Y float" to "CIE L float" conversion takes 0.085s instead of 0.102s. Original code: http://www.hackersdelight.org/hdcodetxt/acbrt.c.txt Permissions: http://www.hackersdelight.org/permissions.htm https://bugzilla.gnome.org/show_bug.cgi?id=791837 --- extensions/CIE.c | 61 +++++++++++------------------------------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/extensions/CIE.c b/extensions/CIE.c index bd9e836..b6fa513 100644 --- a/extensions/CIE.c +++ b/extensions/CIE.c @@ -565,61 +565,28 @@ lchaba_to_rgba (const Babl *conversion,char *src, /******** begin floating point RGB/CIE color space conversions ********/ -/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Debugged and optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== +/* origin: http://www.hackersdelight.org/hdcodetxt/acbrt.c.txt + * permissions: http://www.hackersdelight.org/permissions.htm */ /* _cbrtf(x) * Return cube root of x */ -#include #include -static const unsigned -B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */ -B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ - -static inline float _cbrtf(float x) +static inline float +_cbrtf (float x) { - float r,T; - union {float f; uint32_t i;} u = {x}; - uint32_t hx = u.i & 0x7fffffff; - - if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */ - return x + x; - - /* rough cbrt to 5 bits */ - if (hx < 0x00800000) { /* zero or subnormal? */ - if (hx == 0) - return x; /* cbrt(+-0) is itself */ - u.f = x*0x1p24f; - hx = u.i & 0x7fffffff; - hx = hx/3 + B2; - } else - hx = hx/3 + B1; - u.i &= 0x80000000; - u.i |= hx; - - T = u.f; - r = T*T*T; - T = T*((float)x+x+r)/(x+r+r); - - r = T*T*T; - T = T*((float)x+x+r)/(x+r+r); - - return T; + union { float f; uint32_t i; } u = { x }; + + u.i = u.i / 4 + u.i / 16; + u.i = u.i + u.i / 16; + u.i = u.i + u.i / 256; + u.i = 0x2a5137a0 + u.i; + u.f = 0.33333333f * (2.0f * u.f + x / (u.f * u.f)); + u.f = 0.33333333f * (2.0f * u.f + x / (u.f * u.f)); + + return u.f; } static inline float -- 2.30.2